import re
import jieba
file_path = 'StockList/stock_names.txt'
def read_stock_names():
    stock_names = []
    with open(file_path, 'r', encoding='utf-8') as file:
        for line in file:
            stock_names.append(line.strip())
    return stock_names

def extract_stock_names(text):
    stock_names = read_stock_names()
    # print(stock_names)
    # 使用正则表达式匹配股票名称
    pattern = "|".join(map(re.escape, stock_names))
    matches = re.findall(pattern, text)

    # 使用中文分词提取股票名称
    words = jieba.lcut(text)
    extracted_names = [word for word in words if word in stock_names]

    # 合并两种方法提取的股票名称
    extracted_names += matches

    # 去除重复项并返回
    return list(set(extracted_names))
